/********************************************************************/
/* This file creates the base dataset used for much of the analysis */
/********************************************************************/

/* David Sturrock, May 2016, updated October 2019 */

clear all
set more off
set maxvar 30000


/***************************************************************/
/* Step 1: Get and arrange ONS data on probability of survival */
/***************************************************************/

** By age/sex/wave **

	import excel using "$onsdata\ONS life tables 1841 onwards.xls", sheet("Males cohort lx") cellrange(A9:DN120) firstrow clear
	rename ageyears age
	local cohort=1899
	foreach var of varlist B-DN {
		local cohort=`cohort'+1
		rename `var' cohort`cohort'
	}
	keep age cohort*
	foreach wave in 1 2 3 4 5 6 7 8 {
		local year=2000+2*`wave'
		gen birth_yearw`wave' = .
		gen initialw`wave' = .
		forv age = 50(1)110 {
			replace birth_yearw`wave' = `year'-age
			gen year_`age' = `year'+`age'-age
			gen survive_`age'w`wave' = .
			forv yob = 1900(1)2016 {
			replace initialw`wave' = cohort`yob' if birth_yearw`wave' == `yob' & `year'-birth_yearw`wave' == age
			gen survive`age'_`yob' = .
			replace survive`age'_`yob' = cohort`yob' if age == `age'
			egen survive2`age'_`yob' = min(survive`age'_`yob')
			replace survive_`age'w`wave' = survive2`age'_`yob' if birth_yearw`wave' == `yob'
			drop survive`age'_`yob'
			}
			gen prob`age'_w`wave' = survive_`age'w`wave'/initialw`wave'*100
			replace prob`age'_w`wave' = . if age>`age'
			drop year_`age'
			drop survive2*
		}	
	}
	keep age prob*
	reshape long prob50_w prob51_w prob52_w prob53_w prob54_w prob55_w prob56_w prob57_w prob58_w prob59_w ///
	prob60_w prob61_w prob62_w prob63_w prob64_w prob65_w prob66_w prob67_w prob68_w prob69_w ///
	prob70_w prob71_w prob72_w prob73_w prob74_w prob75_w prob76_w prob77_w prob78_w prob79_w ///
	prob80_w prob81_w prob82_w prob83_w prob84_w prob85_w prob86_w prob87_w prob88_w prob89_w ///
	prob90_w prob91_w prob92_w prob93_w prob94_w prob95_w prob96_w prob97_w prob98_w prob99_w ///
	prob100_w prob101_w prob102_w prob103_w prob104_w prob105_w prob106_w prob107_w prob108_w prob109_w ///
	prob110_w, i(age) j(wave)

	gen sex = 1
	rename *_w *
	sort age wave

	save "$temp\probsurvive_m.dta", replace

	import excel using "$onsdata\ONS life tables 1841 onwards.xls", sheet("Females cohort lx") cellrange(A9:DN120) firstrow clear
	rename ageyears age
	local cohort=1899
	foreach var of varlist B-DN {
		local cohort=`cohort'+1
		rename `var' cohort`cohort'
	}
	keep age cohort*
	foreach wave in 1 2 3 4 5 6 7 8 {
		local year=2000+2*`wave'
		gen birth_yearw`wave' = .
		gen initialw`wave' = .
		forv age = 50(1)110 {
			replace birth_yearw`wave' = `year'-age
			gen year_`age' = `year'+`age'-age
			gen survive_`age'w`wave' = .
			forv yob = 1900(1)2016 {
			replace initialw`wave' = cohort`yob' if birth_yearw`wave' == `yob' & `year'-birth_yearw`wave' == age
			gen survive`age'_`yob' = .
			replace survive`age'_`yob' = cohort`yob' if age == `age'
			egen survive2`age'_`yob' = min(survive`age'_`yob')
			replace survive_`age'w`wave' = survive2`age'_`yob' if birth_yearw`wave' == `yob'
			drop survive`age'_`yob'
			}
			gen prob`age'_w`wave' = survive_`age'w`wave'/initialw`wave'*100
			replace prob`age'_w`wave' = . if age>`age'
			drop year_`age'
			drop survive2*
		}	
	}
	keep age prob*
	reshape long prob50_w prob51_w prob52_w prob53_w prob54_w prob55_w prob56_w prob57_w prob58_w prob59_w ///
	prob60_w prob61_w prob62_w prob63_w prob64_w prob65_w prob66_w prob67_w prob68_w prob69_w ///
	prob70_w prob71_w prob72_w prob73_w prob74_w prob75_w prob76_w prob77_w prob78_w prob79_w ///
	prob80_w prob81_w prob82_w prob83_w prob84_w prob85_w prob86_w prob87_w prob88_w prob89_w ///
	prob90_w prob91_w prob92_w prob93_w prob94_w prob95_w prob96_w prob97_w prob98_w prob99_w ///
	prob100_w prob101_w prob102_w prob103_w prob104_w prob105_w prob106_w prob107_w prob108_w prob109_w ///
	prob110_w, i(age) j(wave)
	gen sex = 2
	rename *_w *
	sort age wave

	save "$temp\probsurvive_f.dta", replace
	append using "$temp\probsurvive_m.dta"
	sort wave sex age
	save "$temp\probsurvive.dta", replace
	
** By age/sex/cohort **

	import excel using "$onsdata\ONS life tables 1841 onwards.xls", sheet("Males cohort lx") cellrange(A9:DN120) firstrow clear
	rename ageyears age
	local cohort=1899
	foreach var of varlist B-DN {
		local cohort=`cohort'+1
		rename `var' cohort`cohort'
	}
	keep age cohort*
	reshape long cohort , i(age) j(yob)
	rename cohort survive
	rename yob dobyear
	reshape wide survive, i(dobyear) j(age)
	gen sex = 1
	save "$temp\cohort_mortality_m.dta", replace
	
	import excel using "$onsdata\ONS life tables 1841 onwards.xls", sheet("Females cohort lx") cellrange(A9:DN120) firstrow clear
	rename ageyears age
	local cohort=1899
	foreach var of varlist B-DN {
		local cohort=`cohort'+1
		rename `var' cohort`cohort'
	}
	keep age cohort*
	reshape long cohort , i(age) j(yob)
	rename cohort survive
	rename yob dobyear
	reshape wide survive, i(dobyear) j(age)
	gen sex = 2
	save "$temp\cohort_mortality_f.dta", replace
	
	append using "$temp\cohort_mortality_m.dta"
	
	save "$temp\cohort_mortality.dta", replace

***********************************
*** Step 2: Parental mortality  ***
***********************************
           
		   use $core6, replace
            foreach v of varlist _all{
                rename `v' `=lower("`v'")'
            }
            tempfile wave6
            save `wave6'


            use idauniq indage painhh mainhh dinfa difad dicdnf alivepab aagepab dinma dimad dicdnm alivemab aagemab using $core1, clear
            foreach var of varlist painhh1 mainhh1 dinfa difad dicdnf dinma dimad dicdnm indager {
                rename `var' `var'1
            }
            forv x=2/5 {
                merge 1:1 idauniq using ${core`x'}, keepusing(painhh mainhh dinfa difad dicdnf dinma dimad dicdnm indager)
                drop _m
                if `x'==2 {
                    rename painhh1 painhh
                    rename mainhh1 mainhh
                }
                if `x'==4 {
                    replace dinfa=-8 if dinfa==-2    /* GT: 72 cases coded "-2" for no apparent reason */
                    replace dinma=-8 if dinma==-2
                    replace difad=-8 if difad==-2
                    replace dimad=-8 if dimad==-2
                }
                foreach var of varlist painhh mainhh dinfa difad dicdnf dinma dimad dicdnm indager {
                    rename `var' `var'`x'
                }
            }
            merge 1:1 idauniq using `wave6', keepusing(painhh mainhh dinfa difad dicdnf dinma dimad dicdnm indager)
            drop _m
            foreach var of varlist painhh mainhh dinfa difad dicdnf dinma dimad dicdnm indager {
                rename `var' `var'6
            }
            rename painhh11 painhh1
            rename mainhh11 mainhh1
            forv x=1/6 {
                merge 1:1 idauniq using ${dv`x'}, keepusing(explive?? explive100)
                rename explive75  explive75_`x'
                rename explive80  explive80_`x'
                rename explive85  explive85_`x'
                rename explive90  explive90_`x'
                rename explive95  explive95_`x'
                rename explive100 explive100_`x'
                drop _m
            }

            /* Are mother/father alive? */
            forv x=1/6 {
                gen fatherdead`x' = .
                gen motherdead`x' = .
                replace fatherdead`x' = 1 if alivepab==2                                        /* HSE answers */
                replace motherdead`x' = 1 if alivemab==2
                label var fatherdead`x' "Father dead"
                label var motherdead`x' "Mother dead"
                forv back=1/6 {
                    local y=`x'-`back'+1
                    if `y'>0 {
                        replace fatherdead`x' = 1 if dinfa`y'==2                                /* ELSA answers from current and previous waves (if died already) */
                        replace motherdead`x' = 1 if dinma`y'==2
                    }
                }
                replace fatherdead`x' = 0 if dinfa`x'==1                                        /* ELSA answers from current wave (if still alive) */
                replace motherdead`x' = 0 if dinma`x'==1
                replace fatherdead`x' = 0 if painhh`x'==1
                replace motherdead`x' = 0 if mainhh`x'==1
            }
            forv x=1/6 {
                forv forward=1/5 {
                    local y=`x'+`forward'
                    if `y'<7 {
                        replace fatherdead`x' = 0 if fatherdead`x'==. & fatherdead`y'==0        /* ELSA answers from later waves (if still alive) */
                        replace motherdead`x' = 0 if motherdead`x'==. & motherdead`y'==0
                    }
                }
                replace fatherdead`x' = -8 if fatherdead`x'==. & (dinfa`x'==-9|dinfa`x'==-8|dinfa`x'==-1)
                replace motherdead`x' = -8 if motherdead`x'==. & (dinma`x'==-9|dinma`x'==-8|dinma`x'==-1)
            }

            /* Age parents died */
            gen     fatherdiedage1 = aagepab if aagepab>0 & aagepab<.                           /* HSE response */
            gen     motherdiedage1 = aagemab if aagemab>0 & aagemab<.
            replace fatherdiedage1 = difad1  if fatherdiedage1==. & difad1>14 & difad1<.        /* ELSA response (but exclude ages <15, which appear wrong) */
            replace motherdiedage1 = dimad1  if motherdiedage1==. & dimad1>14 & dimad1<.
            forv x=2/6 {
                gen fatherdiedage`x' = .
                gen motherdiedage`x' = .
                forv back = 1/5 {
                    local y=`x'-`back'
                    if `y'>0 {
                        replace fatherdiedage`x' = fatherdiedage`y' if fatherdiedage`y'~=.       /* Answer from previous waves */
                        replace motherdiedage`x' = motherdiedage`y' if motherdiedage`y'~=.
                    }
                }
                replace fatherdiedage`x' = difad`x' if fatherdiedage`x'==. & difad`x'>14 & difad`x'<.
                replace motherdiedage`x' = dimad`x' if motherdiedage`x'==. & dimad`x'>14 & dimad`x'<.
            }
            forv x=1/5 {
                forv forward = 1/5 {
                    local y=`x'+`forward'
                    if `y'<7 {
                        replace fatherdiedage`x' = fatherdiedage`y' if fatherdead`x'==1 & fatherdiedage`x'==. & fatherdiedage`y'~=.    /* Fill in from later waves if already dead */
                        replace motherdiedage`x' = motherdiedage`y' if motherdead`x'==1 & motherdiedage`x'==. & motherdiedage`y'~=.
                    }
                }
            }
            forv x = 1/6 {
                replace fatherdiedage`x' = -1 if fatherdead`x'==0 & fatherdiedage`x'==.
                replace motherdiedage`x' = -1 if motherdead`x'==0 & motherdiedage`x'==.
                replace fatherdiedage`x' = -8 if (fatherdead`x'==1|fatherdead`x'==-8) & fatherdiedage`x'==. & (difad`x'==-1|difad`x'==-9|difad`x'==-8)
                replace motherdiedage`x' = -8 if (motherdead`x'==1|motherdead`x'==-8) & motherdiedage`x'==. & (dimad`x'==-1|dimad`x'==-9|dimad`x'==-8)
                replace fatherdiedage`x' = -8 if (fatherdead`x'==1|fatherdead`x'==-8) & fatherdiedage`x'==. & ((difad`x'>-1 & difad`x'<15)|difad`x'==-5)
                replace motherdiedage`x' = -8 if (motherdead`x'==1|motherdead`x'==-8) & motherdiedage`x'==. & ((dimad`x'>-1 & dimad`x'<15)|dimad`x'==-5)
            }

            label define ageparentdied 1 "<50" 2 "50-59" 3 "60-64" 4 "65-69" 5 "70-74" 6 "75-79" 7 "80-84" 8 "85+" -1 "Still alive" -8 "Don't know"
            forv x=1/6 {
                gen     agepadied`x' =  1 if fatherdiedage`x'<50 & fatherdiedage`x'>0
                replace agepadied`x' =  2 if fatherdiedage`x'<.  & fatherdiedage`x'>49
                replace agepadied`x' =  3 if fatherdiedage`x'<.  & fatherdiedage`x'>59
                replace agepadied`x' =  4 if fatherdiedage`x'<.  & fatherdiedage`x'>64
                replace agepadied`x' =  5 if fatherdiedage`x'<.  & fatherdiedage`x'>69
                replace agepadied`x' =  6 if fatherdiedage`x'<.  & fatherdiedage`x'>74
                replace agepadied`x' =  7 if fatherdiedage`x'<.  & fatherdiedage`x'>79
                replace agepadied`x' =  8 if fatherdiedage`x'<.  & fatherdiedage`x'>84
                replace agepadied`x' = -1 if fatherdiedage`x'==-1
                replace agepadied`x' = -8 if fatherdiedage`x'==-8
                label values agepadied`x' ageparentdied
                gen     agemadied`x' =  1 if motherdiedage`x'<50 & motherdiedage`x'>0
                replace agemadied`x' =  2 if motherdiedage`x'<.  & motherdiedage`x'>49
                replace agemadied`x' =  3 if motherdiedage`x'<.  & motherdiedage`x'>59
                replace agemadied`x' =  4 if motherdiedage`x'<.  & motherdiedage`x'>64
                replace agemadied`x' =  5 if motherdiedage`x'<.  & motherdiedage`x'>69
                replace agemadied`x' =  6 if motherdiedage`x'<.  & motherdiedage`x'>74
                replace agemadied`x' =  7 if motherdiedage`x'<.  & motherdiedage`x'>79
                replace agemadied`x' =  8 if motherdiedage`x'<.  & motherdiedage`x'>84
                replace agemadied`x' = -1 if motherdiedage`x'==-1
                replace agemadied`x' = -8 if motherdiedage`x'==-8
                label values agemadied`x' ageparentdied
            }
            foreach var in fatherdead motherdead fatherdiedage motherdiedage agepadied agemadied {
                forv x=1/6 {
                    replace `var'`x' = . if indager`x'==.
                }
            }
            label define parentsurvived 0 "Both parents died before current age+10" 1 "Only mother survived to current age+10" 2 "Only father survived to current age+10" 3 "Both parents survived to current age+10" 4 "At least one parent died before current age+10 (other unknown)" 5 "At least one parent survived to current age+10 (other unknown)" 6 "Both parents unknown"
            forv x=1/6 {
                gen parentsurvived`x' = .
                replace parentsurvived`x' = 0 if fatherdiedage`x'<indager`x'+10 & fatherdiedage`x'>0 & motherdiedage`x'<indager`x'+10 & motherdiedage`x'>0
                replace parentsurvived`x' = 3 if fatherdiedage`x'>=indager`x'+10 & fatherdiedage`x'<. & motherdiedage`x'>=indager`x'+10 & motherdiedage`x'<.
                replace parentsurvived`x' = 3 if fatherdiedage`x'>=indager`x'+10 & fatherdiedage`x'<. & motherdead`x'==0
                replace parentsurvived`x' = 3 if motherdiedage`x'>=indager`x'+10 & motherdiedage`x'<. & fatherdead`x'==0
                replace parentsurvived`x' = 3 if motherdead`x'==0 & fatherdead`x'==0
                replace parentsurvived`x' = 1 if fatherdiedage`x'<indager`x'+10 & fatherdiedage`x'>0 & (motherdiedage`x'>=indager`x'+10 & motherdiedage`x'<.|motherdead`x'==0)
                replace parentsurvived`x' = 2 if motherdiedage`x'<indager`x'+10 & motherdiedage`x'>0 & (fatherdiedage`x'>=indager`x'+10 & fatherdiedage`x'<.|fatherdead`x'==0)
                replace parentsurvived`x' = 4 if fatherdiedage`x'<indager`x'+10 & fatherdiedage`x'>0 & motherdiedage`x'==-8
                replace parentsurvived`x' = 4 if motherdiedage`x'<indager`x'+10 & motherdiedage`x'>0 & fatherdiedage`x'==-8
                replace parentsurvived`x' = 5 if (fatherdiedage`x'>=indager`x'+10 & fatherdiedage`x'<.|fatherdead`x'==0) & motherdiedage`x'==-8
                replace parentsurvived`x' = 5 if (motherdiedage`x'>=indager`x'+10 & motherdiedage`x'<.|motherdead`x'==0) & fatherdiedage`x'==-8
                replace parentsurvived`x' = 6 if motherdiedage`x'==-8 & fatherdiedage`x'==-8
                label val parentsurvived`x' parentsurvived
            }

            keep idauniq fatherdead? motherdead? age?adied? fatherdiedage? motherdiedage? parentsurvived?
            compress
            save "$temp\parentalmortality.dta", replace


/***********************************************************************************/
/* Step 3: Set up data with background info and subsequent mortality for each wave */
/***********************************************************************************/

forv x=1/7 {

	* first merge core data, derived vars, index file, mortality data, health cleaned file, date of birth, interview date and relevant ONS data *
    use "${core`x'}", replace
	gen wave  = `x'
    drop couple
    merge 1:1 idauniq using "${dv`x'}"
    assert _m==3
    drop _m
    merge 1:1 idauniq using "${findv`x'}"
    assert _m==3
    drop _m
    if `x' <6 drop finstat*
    merge 1:1 idauniq using "$index"
    if `x'<6 assert _m==2|_m==3 // index file goes up to wave 5
    keep if _m==3
    drop _m
    merge 1:1 idauniq using "$derivedmortality" 
    drop if _m==2
    drop _m
    merge 1:1 idauniq wave using "$healthdata"
	keep if _m==1|_m==3
    drop _m
    merge 1:1 idauniq using "$dob", keepusing(dob)
    assert _m==3|_m==2
    keep if _m==3
    drop _m
	merge 1:1 idauniq using "$intdat", keepusing(*w`x')
	assert _m==3|_m==2
	drop if _m == 2
	drop _m
	if `x' == 1 rename intdthw1 intdtiw1
	rename intdtiw`x' intdtiw
	merge m:1 age sex wave using "$temp\probsurvive.dta"
    drop if _m==2
    drop _m
    gen finstat=finstatw`x'
    drop finstatw?
	
	*** Clean age using DoB *** 
	gen calc_age = yofd(intdtiw) - yofd(dob)
	replace calc_age = calc_age - 1 if month(intdtiw) < month(dob)
	replace calc_age = calc_age - 1 if month(intdtiw) == month(dob) & day(intdtiw) < day(dob)
	replace age = calc_age if age == 99 // 99 is coded as if missing
	gen age_discrep = age  - calc_age
	* some info on when discrepencies arise *
	tab age_discrep if age!=calc_age
	tab dob if age!=calc_age
	gen month_diff = month(intdtiw) - month(dob)
	gen day_diff = day(intdtiw) - day(dob)
	tab month_diff if age!=calc_age
	tab day_diff if age!=calc_age
	** replace age with calculated age when more than a year's difference **
	replace age = calc_age if age - calc_age < -1 | age - calc_age > -1
	
    sort idauniq
	
    /* Create useful variables */

    ***Whether reach age asked about by end of February 2013***
    gen     reachage = 1 if age<66 & dob<=mdy(2,15,1938)
    replace reachage = 0 if age<66 & dob> mdy(2,15,1938)
    replace reachage = 1 if age>65 & age<70 & dob<=mdy(2,15,1933)
    replace reachage = 0 if age>65 & age<70 & dob> mdy(2,15,1933)
    replace reachage = 1 if age>69 & age<75 & dob<=mdy(2,15,1928)
    replace reachage = 0 if age>69 & age<75 & dob> mdy(2,15,1928)
    replace reachage = 1 if age>74 & age<80 & dob<=mdy(2,15,1923)
    replace reachage = 0 if age>74 & age<80 & dob> mdy(2,15,1923)

    ***Whether die before age 75/80/85***
    gen date75 = dob+365.25*75
    format date75 %tdD_m_Y
    gen     dead75 = date75>mdy(dodmnth,15,dodyr) if dob~=.
    replace dead75 = 0 if date75<=mdy(2,15,2013) & date75< mdy(dodmnth,15,dodyr) & dob~=.
    replace dead75 = 0 if dob~=. & dead75==. & dodmnth==.
    gen survive75 = 1-dead75 if dead75~=.
    label var date75 "Date of 75th birthday"
    label var dead75 "Died before age 75"
    label var survive75 "Survive to age 75"
	
	gen date85 = dob+365.25*85
    format date85 %tdD_m_Y
    gen     dead85 = date85>mdy(dodmnth,15,dodyr) if dob~=.
    replace dead85 = 0 if date85<=mdy(2,15,2013) & date85< mdy(dodmnth,15,dodyr) & dob~=.
    replace dead85 = 0 if dob~=. & dead85==. & dodmnth==.
    gen survive85 = 1-dead85 if dead85~=.
    label var date85 "Date of 85th birthday"
    label var dead85 "Died before age 85"
    label var survive85 "Survive to age 85"

    ***Reporting styles for probabilities***
    gen answer_rain = exrain>=0
    gen answer_insuf = exrslf>=0
    gen answer_work = expw>=0
    gen answer_inh = exainh>=0
    gen answer_beq = excin>=0
    gen answer_live = explive>=0
    gen answer_all = answer_rain==1 & answer_insuf==1 & answer_inh==1 & answer_beq==1 & answer_live==1 & (answer_work==1|expw==-1)
    foreach var of varlist answer* {
        replace `var' = . if proxy==1
        replace `var' = . if exrain==-1
    }
    gen numanswer = 0
    foreach var of varlist exrain exrslf expw exainh excin explive {
        replace numanswer = numanswer+1 if `var'>=0 & `var'<.
    }
    gen num0   = 0
    gen num50  = 0
    gen num100 = 0
    foreach y in 0 50 100 {
        replace num`y' = num`y'+1 if exrain==`y'
        replace num`y' = num`y'+1 if exrslf==`y'
        replace num`y' = num`y'+1 if expw==`y'
        replace num`y' = num`y'+1 if exainh==`y'
        replace num`y' = num`y'+1 if excin==`y'
        replace num`y' = num`y'+1 if explive==`y'
    }
    gen numoth0   = 0
    gen numoth50  = 0
    gen numoth100 = 0
    foreach y in 0 50 100 {
        replace numoth`y' = numoth`y'+1 if exrain==`y'
        replace numoth`y' = numoth`y'+1 if exrslf==`y'
        replace numoth`y' = numoth`y'+1 if expw==`y'
        replace numoth`y' = numoth`y'+1 if exainh==`y'
        replace numoth`y' = numoth`y'+1 if excin==`y'
    }
    gen numfocal = num0+num50+num100
    gen numothfocal = numoth0+numoth50+numoth100
    gen numothfocal2 = numoth0+numoth100
    gen mean = (exrain*answer_rain+exrslf*answer_insuf+expw*answer_work+exainh*answer_inh+excin*answer_beq+explive*answer_live)/numanswer
    foreach var of varlist exrain exrslf expw exainh excin explive {
        replace `var' = . if `var'<0
    }
    gen min = min(exrain,exrslf,expw,exainh,excin,explive)
    gen max = max(exrain,exrslf,expw,exainh,excin,explive)
    gen range = max-min
    gen focalsurvive = (explive==0|explive==50|explive==100) if explive>=0
    gen focalsurvive2 = (explive==0|explive==100) if explive>=0

    gen     probsurv_cat = 1 if exlo80==-8|exlo80==-9
    replace probsurv_cat = 2 if exlo80==0
    replace probsurv_cat = 3 if exlo80>0 & exlo80<50
    replace probsurv_cat = 4 if exlo80==50
    replace probsurv_cat = 5 if exlo80>50 & exlo80<100
    replace probsurv_cat = 6 if exlo80==100
    label define probsurv_cat 1 "DK" 2 "0" 3 "1\textendash{}49" 4 "50" 5 "51\textendash{}99" 6 "100"
    label val probsurv_cat probsurv_cat
    label var probsurv_cat "Self-reported probability of surviving (%)"

    gen     probsurv_cat2 = 1 if exlo80==0
    replace probsurv_cat2 = 2 if exlo80> 0 & exlo80<=10
    replace probsurv_cat2 = 3 if exlo80>10 & exlo80<=20
    replace probsurv_cat2 = 4 if exlo80>20 & exlo80<=30
    replace probsurv_cat2 = 5 if exlo80>30 & exlo80<50
    replace probsurv_cat2 = 6 if exlo80==50
    replace probsurv_cat2 = 7 if exlo80>50 & exlo80<=60
    replace probsurv_cat2 = 8 if exlo80>60 & exlo80<=70
    replace probsurv_cat2 = 9 if exlo80>70 & exlo80<=80
    replace probsurv_cat2 =10 if exlo80>80 & exlo80<=90
    replace probsurv_cat2 =11 if exlo80>90 & exlo80<100
    replace probsurv_cat2 =12 if exlo80==100
    label define probsurv_cat2 1 "0" 2 "1-10" 3 "11-20" 4 "21-30" 5 "31-49" 6 "50" 7 "51-60" 8 "61-70" 9 "71-80" 10 "81-90" 11 "91-99" 12 "100"
    label val probsurv_cat2 probsurv_cat2
    label var probsurv_cat2 "Self-reported probability of surviving (%)"

    gen     probdie_cat2 = 1 if probsurv_cat2==12
    replace probdie_cat2 = 2 if probsurv_cat2==11
    replace probdie_cat2 = 3 if probsurv_cat2==10
    replace probdie_cat2 = 4 if probsurv_cat2==9
    replace probdie_cat2 = 5 if probsurv_cat2==8
    replace probdie_cat2 = 6 if probsurv_cat2==7
    replace probdie_cat2 = 7 if probsurv_cat2==6
    replace probdie_cat2 = 8 if probsurv_cat2==5
    replace probdie_cat2 = 9 if probsurv_cat2==4
    replace probdie_cat2 =10 if probsurv_cat2==3
    replace probdie_cat2 =11 if probsurv_cat2==2
    replace probdie_cat2 =12 if probsurv_cat2==1
    label define probdie_cat2 12 "100" 11 "90-99" 10 "80-89" 9 "70-79" 8 "51-69" 7 "50" 6 "40-49" 5 "30-39" 4 "20-29" 3 "10-19" 2 "1-9" 1 "0"
    label val probdie_cat2 probdie_cat2

    gen     probsurv_cat3 = 1 if exlo80<=20 & exlo80>=0
    replace probsurv_cat3 = 2 if exlo80>20 & exlo80<50
    replace probsurv_cat3 = 3 if exlo80==50
    replace probsurv_cat3 = 4 if exlo80>50 & exlo80<=60
    replace probsurv_cat3 = 5 if exlo80>60 & exlo80<=70
    replace probsurv_cat3 = 6 if exlo80>70 & exlo80<=80
    replace probsurv_cat3 = 7 if exlo80>80 & exlo80<100
    replace probsurv_cat3 = 8 if exlo80==100
    label define probsurv_cat3 1 "<=20" 2 "21-49" 3 "50" 4 "51-60" 5 "61-70" 6 "71-80" 7 "81-99" 8 "100"
    label val probsurv_cat3 probsurv_cat3
    label var probsurv_cat3 "Self-reported probability of surviving (%)"

    gen     probdie_cat3 = 1 if probsurv_cat3==8
    replace probdie_cat3 = 2 if probsurv_cat3==7
    replace probdie_cat3 = 3 if probsurv_cat3==6
    replace probdie_cat3 = 4 if probsurv_cat3==5
    replace probdie_cat3 = 5 if probsurv_cat3==4
    replace probdie_cat3 = 6 if probsurv_cat3==3
    replace probdie_cat3 = 7 if probsurv_cat3==2
    replace probdie_cat3 = 8 if probsurv_cat3==1
    label define probdie_cat3 8 ">=80" 7 "51-79" 6 "50" 5 "40-49" 4 "30-39" 3 "20-29" 2 "1-19" 1 "0"
    label val probdie_cat3 probdie_cat3

    gen     probsurv_cat4 = 1 if exlo80==-8|exlo80==-9
    replace probsurv_cat4 = 2 if exlo80==0
    replace probsurv_cat4 = 3 if exlo80>0 & exlo80<100
    replace probsurv_cat4 = 4 if exlo80==100
    label define probsurv_cat4 1 "DK" 2 "0%" 3 "Between 0% and 100%" 4 "100%"
    label val probsurv_cat4 probsurv_cat4
    label var probsurv_cat4 "Expected chance of surviving at age 75/80"

    if `x'>2 {
        gen     prob85_cat = 1 if exlo90==-8|exlo90==-9
        replace prob85_cat = 2 if exlo90==0
        replace prob85_cat = 3 if exlo90>0 & exlo90<100
        replace prob85_cat = 4 if exlo90==100
        label val prob85_cat probsurv_cat4
        label var prob85_cat "Expected chance of surviving to age 85"
    }

    gen plive75_noresp = exlo80==-8|exlo80==-9 if exlo80~=-1
    gen plive75_0 = exlo80==0 if exlo80~=-1
    gen plive75_50 = exlo80==50 if exlo80~=-1
    gen plive75_100 = exlo80==100 if exlo80~=-1

    ***Socioeconomic variables***
    replace schleave = . if schleave<0
    replace numtype5 = . if numtype5<0
    gen single = marstat>2 if marstat<.
    label var single "\addlinespace \textit{Marital status}"
    label define single 0 "Married/cohabiting" 1 "Single"
    label val single single
    replace numtype4 = . if numtype4<0
    label var ageg5 "Age group"
    label define ageg5 1 "50\textendash{}54" 2 "55\textendash{}59" 3 "60\textendash{}64" 4 "65\textendash{}69" 5 "70\textendash{}74" 6 "75\textendash{}79" 7 "80\textendash{}84", modify
    gen agegroup = ageg5 if age<66
    replace agegroup = 3 if age==65|(age==64 & agegroup~=3)
    label define agegroup 1 "50\textendash{}54" 2 "55\textendash{}59" 3 "60\textendash{}65"
    label val agegroup agegroup
    label var agegroup "Age:"
    label define sex 1 "\textbf{Men}" 2 "\textbf{Women}", modify
    label var sex "Sex"
    label var schleave "\addlinespace \textit{Education level}"
    label define schleave 0 "Low" 1 "Mid" 2 "High", modify
    label var numtype5 "Numerical ability:"
    label define numtype5 1 "Worst" 2 "2" 3 "3" 4 "4" 5 "Best", modify
    label var numtype4 "\addlinespace \textit{Numerical ability}"
    label define numtype4 1 "Worst" 2 "2" 3 "3" 4 "Best", modify

    gen working = wpactw==1|wpactw==2|wpactw==3
    label var working "Working"
	
	
	*** Create cohort bands ***
	gen cohort5 = .
	label define cohort5 -1 "Missing"
	forv c = 0(1)20 {
	local low = 1900 + 5*`c'
	local j = `c' + 1
	local high = 1900 + 5*`j' - 1
	replace cohort5 = `c' if inrange(dobyear,`low',`high')
	label define cohort5 `c' "Born `low'-`high'", add
	}
	label values cohort5 cohort5
	
    ***Income and wealth***
    gen incomeq = .
    forv age=1/8 {
        xtile temp=eqtotinc_bu_s, nq(5), if ageg5==`age'
        replace incomeq = temp if incomeq==.
        drop temp
    }
    label var incomeq "Income quintile:"
    label define incomeq 1 "Lowest" 2 "2" 3 "3" 4 "4" 5 "Highest"
    label val incomeq incomeq
    gen wealthq = .
    forv age=1/8 {
        xtile temp=nettotw_bu_s, nq(5), if ageg5==`age'
        replace wealthq = temp if wealthq==.
        drop temp
    }
    label var wealthq "Wealth quintile:"
    label val wealthq incomeq

    ***Health***
    if `x'==1 {
        gen     alcohol = 1 if heala==1|heala==2
        replace alcohol = 2 if heala==3
        replace alcohol = 3 if heala==4
        replace alcohol = 4 if heala==5
        replace alcohol = 5 if heala==6
    }
    if `x'~=1 {
        gen     alcohol = 1 if scako==1|scako==2|scako==3
        replace alcohol = 2 if scako==4
        replace alcohol = 3 if scako==5
        replace alcohol = 4 if scako==6|scako==7
        replace alcohol = 5 if scako==8
    }
    label define alcohol 1 "At least 3-4 days a week" 2 "Once or twice a week" 3 "Once or twice a month" 4 "A few times a year" 5 "Not at all"
    label val alcohol alcohol
    label var alcohol "Alcohol consumption:"

    replace smokerstat = . if smokerstat<0
    label define smokerstat 0 "Non-smoker" 1 "Former occasional smoker" 2 "Former regular smoker" 3 "Former smoker, DK frequency" 4 "Current smoker", modify
    label var smokerstat "Smoking behaviour:"

    replace srh_hrs = . if srh_hrs<0
    label var srh_hrs "Self-reported health:"
    label define srh_hrs 1 "Excellent" 2 "Very good" 3 "Good" 4 "Fair" 5 "Poor", modify

    gen irecall = cflisen if cflisen>=0 & cflisen<.
    gen drecall = cflisd  if cflisd>=0  & cflisd<.
    gen recall  = cflisen+cflisd if cflisen>=0 & cflisen<. & cflisd>=0 & cflisd<.
    gen irecall_dk = (cflisen<-1)
    gen drecall_dk = (cflisd<-1)
    label var irecall "Immediate recall - number of words"
    label var drecall "Delayed recall - number of words"
    label var recall "Word recall - total number"

    if `x'==3 replace srh_hrs=srh_hse

    gen heart = everhm==1|everhf==1|evermi==1

    ***Mortality***
    gen dead2yr = mdy(dodmnth,15,dodyr)-mdy(intdatm,15,intdaty)<365*2 if dodmnth~=.
    replace dead2yr = 0 if dodmnth==.
	replace dead2yr = . if intdaty > 2011 | (intdaty == 2011 & intdatm <= 3 )
    label var dead2yr "Died within 2 years of interview"
    if `x'==1 {
        gen dead10yr = mdy(dodmnth,15,dodyr)-mdy(intdatm,15,intdaty)<365*10 if dodmnth~=.
        replace dead10yr = 0 if dodmnth==.
        label var dead10yr "Died within 10 years of interview"
    }

    sort idauniq
    save "$temp\wave`x'base.dta", replace
}


/**********************************************/
/* Step 4: Merge in data on when parents died */
/**********************************************/
use "$temp\parentalmortality.dta", replace
reshape long fatherdead motherdead fatherdiedage motherdiedage agepadied agemadied parentsurvived, i(idauniq) j(wave)
tempfile parents
save `parents'

forv x=1/7 {
    use "$temp\wave`x'base.dta", replace
    merge 1:1 idauniq wave using `parents'
    if `x'!=7 assert _m==2|_m==3
    if `x'!=7 keep if _m==3
	if `x'==7 keep if _m==1 | _m==3
    drop _m
    replace agepadied = 9 if agepadied==-1
    replace agepadied =10 if agepadied==-8
    replace agemadied = 9 if agemadied==-1
    replace agemadied =10 if agemadied==-8
    label define ageparentdied 1 "$ <$50" 2 "50\textendash{}59" ///
                               3 "60\textendash{}64" 4 "65\textendash{}69" ///
                               5 "70\textendash{}74" 6 "75\textendash{}79" ///
                               7 "80\textendash{}84" 8 "85+" ///
                               9 "Still alive" 10 "Don't know", modify
    label var agepadied "Age father died:"
    label var agemadied "Age mother died:"
    sort idauniq
    save "$temp\wave`x'base.dta", replace
}

/*************************************************************/
/* Step 5: Merge in information about partner			     */
/*************************************************************/

	forv x = 1/7 {

	clear
	use "$temp\wave`x'base.dta"

	* create a file of parter's info that can use for merge *
	if `x' <=2 keep idauniq exlo80 cohort5 dobyear prob* age
	if `x' >=3 keep idauniq exlo80 exlo90 cohort5 dobyear prob* age
	rename idauniq idauniq_p 
	rename exlo80 exlo80_p
	if `x' >=3 rename exlo90 exlo90_p
	rename cohort5 cohort5_p
	forv age = 50(1)110 {
	rename prob`age' prob`age'_p
	}
	rename dobyear dobyear_p 
	rename age age_p2

	save "$temp\partner_info_merge_w`x'.dta", replace

	clear
	use "$temp\wave`x'base.dta"

	merge m:1 idauniq_p using "$temp\partner_info_merge_w`x'.dta"
	keep if _merge ==1 | _merge == 3
	drop _merge
		
	save "$temp\wave`x'base.dta", replace
	}

/**********************************************/
/* Step 6: Append all files into 1			  */
/**********************************************/

	use "$temp\wave1base.dta", replace
	qui append using "$temp\wave2base.dta"
	qui append using "$temp\wave3base.dta"
	qui append using "$temp\wave4base.dta", force
	qui append using "$temp\wave5base.dta", force
	qui append using "$temp\wave6base.dta", force
	qui append using "$temp\wave7base.dta", force

	* generate variable for wealth quint when first observed above age 60 *
	sort idauniq wave
	bys idauniq : egen first_age = min(age)
	bys idauniq : egen last_age = max(age)
	gen over_sxty_age = .
	replace over_sxty_age = age if age >=60
	bys idauniq : egen first_over_sty = min(over_sxty_age)
	replace first_over_sty = last_age if first_over_sty == .
	gen wealthq2x = .
	replace wealthq2x = wealthq if age == first_over_sty
	bys idauniq : egen wealthq2 = min(wealthq2x)
	drop wealthq2x first_over_sty over_sxty_age first_age last_age

	save "$savedata\wave1-7base.dta", replace
